indicator_2

library(tidyverse)
library(readxl)
library(rmapshaper)
library(sf)
library(mapview)
library(tmap)
library(lintr)
library(leaflet)
library(RColorBrewer)
library(plotly)
#mortality_rate_unintentional_poisoning
mortality_rate_unintentional_poisoning <- read_csv("./data/indicator_3.9.3.csv",show_col_types = FALSE)

#proportion-of-population-with-basic-handwashing-facilities-on-premises-by-urban-rural-percent
population_with_basic_handwashing_facilities<- read_csv("./data/indicator_6.2.1.csv",show_col_types = FALSE)
mortality_rate_unintentional_poisoning <- select(
  mortality_rate_unintentional_poisoning,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  gender = 'sex_desc',
  mortality_rate_unintentional_poisoning_2019 = 'value_2019'
)

# drop rows if contains at least one NA value
mortality_rate_unintentional_poisoning <- na.omit(mortality_rate_unintentional_poisoning)

#check for empty row
any(is.na(mortality_rate_unintentional_poisoning))
[1] FALSE
mortality_rate_unintentional_poisoning
# A tibble: 597 × 6
    code country     region                  iso   gender mortality_rate_unint…¹
   <dbl> <chr>       <chr>                   <chr> <chr>                   <dbl>
 1   470 Malta       Southern Europe         MLT   Both …                    0.1
 2   152 Chile       South America           CHL   Male                      0.5
 3     4 Afghanistan Southern Asia (excludi… AFG   Both …                    1  
 4   470 Malta       Southern Europe         MLT   Female                    0  
 5   156 China       Eastern Asia            CHN   Both …                    1.8
 6     4 Afghanistan Southern Asia (excludi… AFG   Both …                    1  
 7   470 Malta       Southern Europe         MLT   Male                      0.2
 8   156 China       Eastern Asia            CHN   Female                    1.5
 9   478 Mauritania  Western Africa          MRT   Both …                    1.5
10   478 Mauritania  Western Africa          MRT   Female                    1.3
# ℹ 587 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# Create the histogram
mortality_histo <- ggplot(mortality_rate_unintentional_poisoning, aes(x=mortality_rate_unintentional_poisoning_2019)) +
  geom_histogram(binwidth=0.1, fill='blue', color='black') +
  labs(title="Mortality rate unintentional poisoning for Year 2019", x="Mortality Rate (per 100,000)", y="Number of Country")

mortality_histo

population_with_basic_handwashing_facilities <- select(
  population_with_basic_handwashing_facilities,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  area = 'location_desc',
  pop_with_basic_handwashing_facilites_2019 = 'value_2019'
)

# drop rows if contains at least one NA value
population_with_basic_handwashing_facilities <- na.omit(population_with_basic_handwashing_facilities)

#check for empty row
any(is.na(mortality_rate_unintentional_poisoning))
[1] FALSE
population_with_basic_handwashing_facilities
# A tibble: 284 × 6
    code country      region                  iso   area  pop_with_basic_handw…¹
   <dbl> <chr>        <chr>                   <chr> <chr>                  <dbl>
 1     4 Afghanistan  Southern Asia (excludi… AFG   All …                     38
 2   356 India        Southern Asia           IND   Urban                     82
 3     4 Afghanistan  Southern Asia (excludi… AFG   All …                     38
 4   360 Indonesia    South-Eastern Asia      IDN   All …                     94
 5     4 Afghanistan  Southern Asia (excludi… AFG   Rural                     29
 6   694 Sierra Leone Western Africa          SLE   Rural                     18
 7   360 Indonesia    South-Eastern Asia      IDN   Rural                     91
 8     4 Afghanistan  Southern Asia (excludi… AFG   Rural                     29
 9     4 Afghanistan  Southern Asia (excludi… AFG   Urban                     64
10     4 Afghanistan  Southern Asia (excludi… AFG   Urban                     64
# ℹ 274 more rows
# ℹ abbreviated name: ¹​pop_with_basic_handwashing_facilites_2019
# Create the histogram
pop_with_handwashing_facilities_histo <- ggplot(population_with_basic_handwashing_facilities, aes(x=pop_with_basic_handwashing_facilites_2019)) +
  geom_histogram(binwidth=5, fill='blue', color='black') +
  labs(title="Population with basic handwashing facilities for Year 2019", x="Population (%)", y="Number of Country")

pop_with_handwashing_facilities_histo

countries_tb <- left_join(
  mortality_rate_unintentional_poisoning,
  population_with_basic_handwashing_facilities,
  by = join_by(country, iso)
) |>
  mutate(
    iso = case_match(
      iso,
      "COD" ~ "ZAR",
      "ROU" ~ "ROM",
      "TLS" ~ "TMP",
      "XKX" ~ "KSV",
      .default = iso
    )
  )
Warning in left_join(mortality_rate_unintentional_poisoning, population_with_basic_handwashing_facilities, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
countries_tb
# A tibble: 1,323 × 10
   code.x country   region.x iso   gender mortality_rate_unint…¹ code.y region.y
    <dbl> <chr>     <chr>    <chr> <chr>                   <dbl>  <dbl> <chr>   
 1    470 Malta     Souther… MLT   Both …                    0.1     NA <NA>    
 2    152 Chile     South A… CHL   Male                      0.5     NA <NA>    
 3      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 4      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 5      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 6      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 7      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 8      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 9    470 Malta     Souther… MLT   Female                    0       NA <NA>    
10    156 China     Eastern… CHN   Both …                    1.8     NA <NA>    
# ℹ 1,313 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: area <chr>,
#   pop_with_basic_handwashing_facilites_2019 <dbl>
countries_sf <- read_sf("./data/WB_countries_Admin0.geojson")
land_sf <- read_sf("./data/WB_Land.geojson")
countries_sf <-
  countries_sf |>
  ms_simplify() |> # Default argument `keep = 0.05`
  st_make_valid()
land_sf <- ms_simplify(land_sf)
npts(countries_sf)
[1] 30826
countries_sf <-
  countries_sf |>
  left_join(
    countries_tb,
    by = join_by(WB_A3 == iso)
  ) |>
  select(country, continent = 'CONTINENT', gender = 'gender', area = 'area', iso = WB_A3, mortality_rate_unintentional_poisoning_2019, pop_with_basic_handwashing_facilites_2019)
countries_sf
Simple feature collection with 1246 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180 ymin: -55.62754 xmax: 179.9038 ymax: 83.6341
Geodetic CRS:  WGS 84
# A tibble: 1,246 × 8
   country   continent gender     area      iso   mortality_rate_unintentional…¹
   <chr>     <chr>     <chr>      <chr>     <chr>                          <dbl>
 1 Indonesia Asia      Both sexes All areas IDN                              0.3
 2 Indonesia Asia      Both sexes Rural     IDN                              0.3
 3 Indonesia Asia      Both sexes Urban     IDN                              0.3
 4 Indonesia Asia      Female     All areas IDN                              0.1
 5 Indonesia Asia      Female     Rural     IDN                              0.1
 6 Indonesia Asia      Female     Urban     IDN                              0.1
 7 Indonesia Asia      Male       All areas IDN                              0.5
 8 Indonesia Asia      Male       Rural     IDN                              0.5
 9 Indonesia Asia      Male       Urban     IDN                              0.5
10 Malaysia  Asia      Both sexes <NA>      MYS                              0.7
# ℹ 1,236 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
polygon <- st_polygon(x = list(rbind(
  c(-180.0001, 90),
  c(-179.9999, 90),
  c(-179.9999, -90),
  c(-180.0001, -90),
  c(-180.0001, 90)
))) |>
  st_sfc() |>
  st_set_crs(4326) # Equirectangular projection
countries_sf <- mutate(
  countries_sf,
  geometry = st_difference(geometry, polygon)
) |>
  st_make_valid()
tm_shape(countries_sf) + tm_polygons()

countries_sf$area <- replace_na(countries_sf$area, "missing")
choropleth_pop <-
  tm_shape(land_sf, projection = "ESRI:54012") +
  tm_polygons(col = "grey") +
  tm_shape(countries_sf) +
  tm_polygons(
    col = "pop_with_basic_handwashing_facilites_2019",
    border.col = "grey30",
    palette = "Greens",
    breaks = c(-Inf, 20, 40, 60, 80, 100, Inf),
    colorNA = "grey",
    title = "Population (%)",
    lwd = 0.5
  ) +
  tm_text("iso", size = "AREA") +  # Use the new column to set the size
  tm_credits(
    c("", "Source: unstats-undesa.opendata.arcgis.com"),
    position = c("right", "bottom")
  ) +
  tm_layout(
    bg.color = "lightblue",
    frame = FALSE,
    inner.margins = c(0.00, 0.2, 0.2, 0.1),
    earth.boundary = TRUE,
    space.color = "white",
    main.title.size = 0.8,
    title.size = 0.5,
    main.title = "Population with basic handwashing facilities by Country for 2019"
  )

choropleth_pop

countries_sf$gender <- replace_na(countries_sf$gender, "missing")
choropleth_mor<-
  tm_shape(land_sf, projection = "ESRI:54012") +
  tm_polygons(col = "grey") +
  tm_shape(countries_sf) +
  tm_polygons(
    col = "mortality_rate_unintentional_poisoning_2019",
    border.col = "grey30",
    palette = "Oranges",
    breaks = c(-Inf, 0.2, 0.4, 0.6, 0.8, 1, Inf),
    colorNA = "grey",
    title = "Mortality Rate \nper 100,000",
    lwd = 0.5
  ) + 
  tm_text("iso", size = "AREA") +
  tm_credits(
    c("", "Source: unstats-undesa.opendata.arcgis.com"),
    position = c("right", "bottom")
  ) +
  tm_layout(
    bg.color = "lightblue",
    frame = FALSE,
    inner.margins = c(0.00, 0.2, 0.2, 0.1),
    earth.boundary = TRUE,
    space.color = "white",
    main.title.size = 0.8,
    title.size = 0.5,
    main.title = "Mortality Rate Unintentional Poisoning by Country for 2019"
  )

choropleth_mor

countries_sf_all_areas <- countries_sf[countries_sf$area == "All areas",]
countries_sf_urban <- countries_sf[countries_sf$area == "Urban",]
countries_sf_rural <- countries_sf[countries_sf$area == "Rural",]

countries_sf_all_areas
Simple feature collection with 324 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -172.7497 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 324 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … All … IDN                      0.3
 2 Indonesia                 Asia      Female All … IDN                      0.1
 3 Indonesia                 Asia      Male   All … IDN                      0.5
 4 Bolivia (Plurinational S… South Am… Both … All … BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Female All … BOL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   All … BOL                      0.6
 7 India                     Asia      Both … All … IND                      0.3
 8 India                     Asia      Female All … IND                      0.2
 9 India                     Asia      Male   All … IND                      0.3
10 Ethiopia                  Africa    Both … All … ETH                      3.3
# ℹ 314 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_urban
Simple feature collection with 306 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -157.449 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 306 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … Urban IDN                      0.3
 2 Indonesia                 Asia      Female Urban IDN                      0.1
 3 Indonesia                 Asia      Male   Urban IDN                      0.5
 4 Bolivia (Plurinational S… South Am… Both … Urban BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Female Urban BOL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   Urban BOL                      0.6
 7 India                     Asia      Both … Urban IND                      0.3
 8 India                     Asia      Female Urban IND                      0.2
 9 India                     Asia      Male   Urban IND                      0.3
10 Ethiopia                  Africa    Both … Urban ETH                      3.3
# ℹ 296 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_rural
Simple feature collection with 309 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -157.449 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 309 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … Rural IDN                      0.3
 2 Indonesia                 Asia      Female Rural IDN                      0.1
 3 Indonesia                 Asia      Male   Rural IDN                      0.5
 4 Bolivia (Plurinational S… South Am… Both … Rural BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Female Rural BOL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   Rural BOL                      0.6
 7 Peru                      South Am… Both … Rural PER                      0.4
 8 Peru                      South Am… Female Rural PER                      0.3
 9 Peru                      South Am… Male   Rural PER                      0.5
10 India                     Asia      Both … Rural IND                      0.3
# ℹ 299 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
# Get a color palette with three colors
colors <- brewer.pal(3, "Set1")

# Assign colors to the categories
colorpal_area <- colorFactor(colors, 
                               levels = c("All areas", "Urban", "Rural"))
leaflet(countries_sf_all_areas) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_area(area),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
                             "<br>Area: ", area)) %>%
  addLegend(pal = colorpal_area, 
            values = ~area, 
            title = "Area", 
            position = "bottomright")
leaflet(countries_sf_urban) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_area(area),  # use colorpal_area here
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  # This adds labels that appear when you hover over a country
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
                             "<br>Area: ", area)) %>%
  addLegend(pal = colorpal_area, 
            values = ~area, 
            title = "Area", 
            position = "bottomright")
leaflet(countries_sf_rural) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_area(area),  # use colorpal_area here
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  # This adds labels that appear when you hover over a country
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
                             "<br>Area: ", area)) %>%
  addLegend(pal = colorpal_area, 
            values = ~area, 
            title = "Area", 
            position = "bottomright")
countries_sf_male <- countries_sf[countries_sf$gender == "Male",]
countries_sf_female <- countries_sf[countries_sf$gender == "Female",]
countries_sf_both <- countries_sf[countries_sf$gender == "Both sexes",]

countries_sf_male
Simple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS:  WGS 84
# A tibble: 410 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Male   All … IDN                      0.5
 2 Indonesia                 Asia      Male   Rural IDN                      0.5
 3 Indonesia                 Asia      Male   Urban IDN                      0.5
 4 Malaysia                  Asia      Male   miss… MYS                      1  
 5 Chile                     South Am… Male   miss… CHL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   All … BOL                      0.6
 7 Bolivia (Plurinational S… South Am… Male   Rural BOL                      0.6
 8 Bolivia (Plurinational S… South Am… Male   Urban BOL                      0.6
 9 Peru                      South Am… Male   Rural PER                      0.5
10 Argentina                 South Am… Male   miss… ARG                      0.5
# ℹ 400 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_female
Simple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS:  WGS 84
# A tibble: 410 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Female All … IDN                      0.1
 2 Indonesia                 Asia      Female Rural IDN                      0.1
 3 Indonesia                 Asia      Female Urban IDN                      0.1
 4 Malaysia                  Asia      Female miss… MYS                      0.3
 5 Chile                     South Am… Female miss… CHL                      0.2
 6 Bolivia (Plurinational S… South Am… Female All … BOL                      0.5
 7 Bolivia (Plurinational S… South Am… Female Rural BOL                      0.5
 8 Bolivia (Plurinational S… South Am… Female Urban BOL                      0.5
 9 Peru                      South Am… Female Rural PER                      0.3
10 Argentina                 South Am… Female miss… ARG                      0.3
# ℹ 400 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_both
Simple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS:  WGS 84
# A tibble: 410 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … All … IDN                      0.3
 2 Indonesia                 Asia      Both … Rural IDN                      0.3
 3 Indonesia                 Asia      Both … Urban IDN                      0.3
 4 Malaysia                  Asia      Both … miss… MYS                      0.7
 5 Chile                     South Am… Both … miss… CHL                      0.4
 6 Bolivia (Plurinational S… South Am… Both … All … BOL                      0.6
 7 Bolivia (Plurinational S… South Am… Both … Rural BOL                      0.6
 8 Bolivia (Plurinational S… South Am… Both … Urban BOL                      0.6
 9 Peru                      South Am… Both … Rural PER                      0.4
10 Argentina                 South Am… Both … miss… ARG                      0.4
# ℹ 400 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
# Get a color palette with three colors
colors <- brewer.pal(3, "Set1")

# Assign colors to the categories
colorpal_gender <- colorFactor(colors, 
                               levels = c("Female", "Male", "Both sexes"))
# Replace NAs in 'gender' column
countries_sf_male$gender <- replace_na(countries_sf_male$gender, "missing")

leaflet(countries_sf_male) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_gender(gender),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
                             "<br>Gender: ", gender)) %>%
  addLegend(pal = colorpal_gender, 
            values = ~gender, 
            title = "Gender", 
            position = "bottomright")
# Replace NAs in 'gender' column
countries_sf_female$gender <- replace_na(countries_sf_female$gender, "missing")

leaflet(countries_sf_female) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_gender(gender),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
                             "<br>Gender: ", gender)) %>%
  addLegend(pal = colorpal_gender, 
            values = ~gender, 
            title = "Gender", 
            position = "bottomright")
# Replace NAs in 'gender' column
countries_sf_both$gender <- replace_na(countries_sf_both$gender, "missing")

leaflet(countries_sf_both) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_gender(gender),  # Assign color based on gender
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
                             "<br>Gender: ", gender)) %>%
  addLegend(pal = colorpal_gender, 
            values = ~gender, 
            title = "Gender", 
            position = "bottomright")
countries_sf <- countries_sf[!is.na(countries_sf$mortality_rate_unintentional_poisoning_2019) & !is.na(countries_sf$pop_with_basic_handwashing_facilites_2019), ]

countries_sf <- countries_sf %>%
  filter(gender == 'Both sexes')

countries_sf
Simple feature collection with 313 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -172.7497 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 313 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
 * <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … All … IDN                      0.3
 2 Indonesia                 Asia      Both … Rural IDN                      0.3
 3 Indonesia                 Asia      Both … Urban IDN                      0.3
 4 Bolivia (Plurinational S… South Am… Both … All … BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Both … Rural BOL                      0.6
 6 Bolivia (Plurinational S… South Am… Both … Urban BOL                      0.6
 7 Peru                      South Am… Both … Rural PER                      0.4
 8 India                     Asia      Both … Urban IND                      0.3
 9 India                     Asia      Both … All … IND                      0.3
10 India                     Asia      Both … Rural IND                      0.3
# ℹ 303 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
# Plot the data
plot_loess <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, y = pop_with_basic_handwashing_facilites_2019)) +
  
  geom_point(aes(color = area, 
                 shape = area,
                 text = paste("Country:", country, "<br>", "Location:", area)), alpha = 0.7) +
  
  geom_smooth(data = subset(countries_sf, area == "All areas"),
              method = loess, se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  geom_smooth(data = subset(countries_sf, area == "Urban"),
              method = loess, se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +

  geom_smooth(data = subset(countries_sf, area == "Rural"),
              method = loess, se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  labs(x = "Mortality Rate (deaths per 100,000 population) for 2019 ",
       y = "Population with Basic Handwashing Facilities for 2019 (%)",
       title = "Correlation between Mortality Rate and Basic Handwashing Facilities",
       color = "Location Type",
       shape = "Location Type") +
  
  theme_minimal() +
  
  guides(color = guide_legend(override.aes = list(shape = 16)),
         shape = guide_legend(override.aes = list(color = "black")))
Warning in geom_point(aes(color = area, shape = area, text = paste("Country:",
: Ignoring unknown aesthetics: text
# Convert to a plotly plot
gp <- ggplotly(plot_loess, tooltip = "text")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp

Correlation Empty Plot

correlation_all <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "All areas"], 
                       countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "All areas"])

correlation_urban <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Urban"], 
                         countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "Urban"])

correlation_rural <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Rural"], 
                         countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "Rural"])

blank_plot <- ggplot() +
  theme_void() +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  annotate("text",
           x = 0.5,
           y = 0.5,
           label = paste("Correlation All:",
                         round(correlation_all, 2), "\n",
                         "Correlation Urban:",
                         round(correlation_urban, 2), "\n",
                         "Correlation Rural:",
                         round(correlation_rural, 2)),
           size = 5,
           color = "black",
           hjust = 0.5)

# Convert to a plotly plot
gp_text <- ggplotly(blank_plot)

gp_text

1.5 Histogram (without Transformation)

stacked_bar_chart <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, fill = area)) +
  geom_bar(position = "stack") +
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Count",
       title = "Stacked Bar Chart of Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart)

# Create a grouped bar chart
grouped_bar_chart <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, fill = area)) +
  geom_bar(position = "dodge") +
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Count",
       title = "Grouped Bar Chart of Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart)
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart, width = 1000, height = 600, autosize = TRUE)
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart, width = 1000, height = 600, autosize = TRUE)

ggplot_stacked_bar_chart
ggplot_grouped_bar_chart

Normalise Data

#Ensure you have the MASS library installed

countries_sf$mortality_rate_unintentional_poisoning_2019.positive <- countries_sf$mortality_rate_unintentional_poisoning_2019 + abs(min(countries_sf$mortality_rate_unintentional_poisoning_2019)) + 0.1
# Apply the Box-Cox transformation
bc_result <- MASS::boxcox(countries_sf$mortality_rate_unintentional_poisoning_2019.positive ~ 1, 
                    lambda = seq(-3,3,0.1))

# The optimal lambda value is the one that maximizes the log-likelihood
optimal_lambda <- bc_result$x[which.max(bc_result$y)]

# Transform the data using the optimal lambda value
if (optimal_lambda == 0) {
  countries_sf$latest_value.x_bc <- log(countries_sf$mortality_rate_unintentional_poisoning_2019.positive)
} else {
  countries_sf$latest_value.x_bc <- (countries_sf$mortality_rate_unintentional_poisoning_2019.positive^optimal_lambda - 1) / optimal_lambda
}

# Shift the transformed variable to be non-negative
min_value <- min(countries_sf$latest_value.x_bc)
countries_sf$latest_value.x_bc <- countries_sf$latest_value.x_bc + abs(min_value) + 0.1

Histogram Normalised

# Round the transformed mortality rates to the nearest whole number
countries_sf$latest_value.x_bc_rounded <- round(countries_sf$latest_value.x_bc)

# Convert the latest_value.x_bc_rounded variable into a categorical variable
countries_sf$latest_value.x_bc_cat <- cut(countries_sf$latest_value.x_bc_rounded, breaks = 50)

# Create a stacked bar chart
stacked_bar_chart_bc <- ggplot(countries_sf, aes(x = latest_value.x_bc_rounded, fill = area)) +
  geom_bar(position = "stack", bins =20) +
  labs(x = "Transformed Mortality Rate",
       y = "Count",
       title = "Stacked Bar Chart of Transformed Mortality Rates") +
  theme_minimal()
Warning in geom_bar(position = "stack", bins = 20): Ignoring unknown
parameters: `bins`
ggplot_stacked_bar_chart_bc <- ggplotly(stacked_bar_chart_bc)
ggplot_stacked_bar_chart_bc
# Create a grouped bar chart
grouped_bar_chart_bc <- ggplot(countries_sf, aes(x = latest_value.x_bc_rounded, fill = area)) +
  geom_bar(position = "dodge") +
  labs(x = "Transformed Mortality Rate",
       y = "Count",
       title = "Grouped Bar Chart of Transformed Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_grouped_bar_chart_bc <- ggplotly(grouped_bar_chart_bc)
ggplot_grouped_bar_chart_bc

Scatter plot Normalised

plot_loess <- ggplot(countries_sf, aes(x = latest_value.x_bc, y = pop_with_basic_handwashing_facilites_2019)) +
  
  geom_point(aes(color = area, 
                 shape = area,
                 text = paste("Country:", country, "<br>", "Location:", area)), alpha = 0.7) +
  
  geom_smooth(data = subset(countries_sf, area == "All areas"),
              method = loess, se = FALSE, 
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  geom_smooth(data = subset(countries_sf, area == "Urban"),
              method = loess,
              se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +

  geom_smooth(data = subset(countries_sf, area == "Rural"),
              method = loess,
              se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  labs(x = "Transformed Mortality Rate (deaths per 100,000 population) for 2019",
       y = "Population with Basic Handwashing Facilities for 2019 (%)",
       title = "Correlation between Mortality Rate and Basic Handwashing Facilities (Normalised)",
       color = "Location Type",
       shape = "Location Type") +
  
  theme_minimal() +
  
  guides(color = guide_legend(override.aes = list(shape = 16)),
         shape = guide_legend(override.aes = list(color = "black")))
Warning in geom_point(aes(color = area, shape = area, text = paste("Country:",
: Ignoring unknown aesthetics: text
gp_normalised <- ggplotly(plot_loess, tooltip = "text")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_normalised
correlation_all <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "All areas"], 
                       countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "All areas"])

correlation_urban <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "Urban"], 
                         countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Urban"])

correlation_rural <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "Rural"], 
                         countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Rural"])

blank_plot <- ggplot() +
  theme_void() +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  annotate("text",
           x = 0.5,
           y = 0.5,
           label = paste("Correlation All:",
                         round(correlation_all, 2), "\n",
                         "Correlation Urban:",
                         round(correlation_urban, 2), "\n",
                         "Correlation Rural:",
                         round(correlation_rural, 2)),
           size = 5,
           color = "black",
           hjust = 0.5)

gp_text_normalised <- ggplotly(blank_plot)
gp_text_normalised

Combining normal and unormalised plots

combined_plot <- subplot(
  gp, gp_text, 
  gp_normalised, gp_text_normalised, 
  nrows = 2, margin = 0.05
)

# Print the combined plot
combined_plot